# Time Series Analysis Laboratory with R
# Prof. Lea Petrella
# Faculty of Economics
# Department of Methods and Models for Economics, Territory and Finance

# Sapienza University of Rome
# a.y. 2019-2020
#####################################

#####################################
#Lesson 1: Introduction to R (Part 2)
#####################################
#Index:
  #
  #1)extraction and selection of elements
  #2)sorting in R
  #3) dataframe
  #4) data import
  #5) descriptive statistics
  #6) graphical representation
  #7) end of 2nd part ---> let's save everything!
########
#Recall the objects from Part 1
v1 = seq (1,10)
v1
matrix1 = matrix (c (1:20), nrow = 5, ncol = 4)
array1
# remember the paste () function
colnames (matrix1) = paste ( "column", 1: 4, sep = "-")

# same thing for the lines
rownames (matrix1) = paste ( "row", 1: 5, sep = "-")
array1

height = c (1.75, 1.80, 1.65, 1.90, 1.80, 1.71)
weight = c (60, 72, 57, 90, 82, 72)
bmi = weight / height ^ 2 # Body Mass Index

#####################################
#1)extraction and selection of elements
#####################################

# 1. based on location

# Extraction from a vector 
v1

# position of the 4th element of v1
v1 [4]

# extract the elements of position 1, 3, 6 
v1 [c (1,3,6)]

# extract elements from 1st to 3rd position 
v1 [c (1,2,3)]

# or
v1 [1: 3]

# It is possibile to extract the same element several times
v1 [c (rep (1.3), 3)]

# I can also select it by negation
v1 [-c (2,4)]

# Extract from a matrix object
array1

# extract the 2nd and 3rd element of matrix1
array1 [2,3]

# extract 2nd row of matrix1
Line2 = array1 [2]
line2

# extract columns without names
names (line2) = NULL
line2

# extract column 3 of matrix1
column3 = array1 [3]
column3
names (column3) = NULL
column3

# extract the elements of row 2 and 3 of column 4 of matrix1
array1 [c (2.3), 4]

# extract the elements of row 2 and 3 of column 1 and 4 of matrix1
array1 [c (2,3), c (1.4)]

# extract the elements located in (2,1) and (3,4) of matrix1
matrix1 [matrix (c (2,3,1,4), nrow = 2, ncol = 2)]

# extract the "head" of a matrix
head (matrix (1: 50, nrow = 10, ncol = 5))


# 2. by label

# is it possibile to extract elements from a matrix by using columns and names
# ... of course the matrix must have row and column names!

matrix1 [ "2-row", "column-3"]
array1 [ "Line-2"]
array1 [, "column-3"]


# 3. based on a "logical" condition

height
height> = 180
height [height> = 1.80]
height [height> 1.75 & height <1.90]
height [height> 1.75 & height <= 1.90]

array1
array1> 8
major8 = matrix1 [matrix1> 8] # returns a vector!

#####################################
#2)sorting in R
#####################################

# sort () function returns the ordered vector

sort (height)
sort (height, decreasing = T)

# function order () returns the original element position

order (height)

# another function that rearranges "reversing" the order of the elements

rev (height)

#####################################
#2)data.frame
#####################################

# Data frames are the main objects in statistical analysis. Classic fromat: units(rows) x variables (columns)

#How to create a data-frame

df = data.frame (height, weight) #Each row of this object represents a statistical unit 

#How to create an identifier for each unit

seq id = (1,6,1)
df1 = data.frame (id, height, weight)
#How do you access the variables of a data frame?

# By location

df [2] #Returns the weight variable
df1 [1] #Returns the variable id

#By label

df $ weight #Returns the weight variable
df1 $ id #Returns the variable id

# Variable names in the dataframe
names (df1) #check the names

#Removing variable names
names (df1) = NULL

#how to change names of dataframe variables

names.eng = c ( "identity", "Height", "Weight")
names (df1) = names.eng

#how to order a dataframe with respect to a variable

df.ord = df1 [order (height),] #sorts units with respect to the variable height
df.ord1 = df1 [order (weight),] #sorts units with respect to the weight variable

#First descriptive statistics of a dataframe
summary (df1 [2: 3])

# how to extract a subset of data using the subset function

help (subset)
df1.subset = subset (df1, height> = 1.80)
df2.subset = subset (df1, height> = 1.80 & weight> 80)
df3.subset = subset (df1, height> = 1.80 | weight> 80)
df4.sub = subset (df1, drop (id> 3))


# in case of missing values

df5.sub = subset (df1, is.na (weight) == FALSE)
df6.sub = subset (df1, is.na (weight) == FALSE & is.na (height) == FALSE)

#how to select the entire dataframe by omitting all missing values
# we can use the complete.cases () function

complete.cases (df1)
df1 [complete.cases (df1)]

######################################
Save!
######################################